library(tidyverse)
library(plotly)
library(sf)
library(mapview)
library(tigris)
library(censusapi)
library(leaflet)
library(lehdr)
library(usmap)
options(
tigris_class = "sf",
tigris_use_cache = TRUE
)
Sys.setenv(CENSUS_KEY="10dcd73d7c043e91bac9fb8d3989cbff54b08790")
# load in income data - code adapted from other students
bay_median_income_by_block <-
pullCensus("B19013_001E", bay_area_counties) %>%
filter(blockgroup %in% bay_sd$origin_census_block_group) %>%
rename(
Median_Income = B19013_001E
) %>%
filter(!is.na(Median_Income)) %>%
left_join(bay_sd_at_home_average, by = c("blockgroup" = "origin_census_block_group")) %>%
filter(!is.na(device_count))
bay_ami_by_block <-
pullCensus("group(B19001)", bay_area_counties) %>%
dplyr::select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
filter(blockgroup %in% bay_sd$origin_census_block_group) %>%
group_by(blockgroup) %>%
summarize(
Total = B19001_001E,
`Under 75,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E),
#sum(lapply(2:12, function(x) as.name(paste0("B19001_00",x,"E"))))
`Under 100,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E),
`Under 125,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E, B19001_014E),
`Under 150,000` = sum(B19001_002E, B19001_003E, B19001_004E, B19001_005E, B19001_006E, B19001_007E, B19001_008E, B19001_009E, B19001_010E, B19001_011E, B19001_012E, B19001_013E, B19001_014E, B19001_015E)
) %>%
mutate(
`% under 75,000` = `Under 75,000` / Total * 100,
`% over 75,000` = (100 - `% under 75,000`),
`% under 100,000` = `Under 100,000` / Total * 100,
`% over 100,000` = (100 - `% under 100,000`),
`% under 125,000` = `Under 125,000` / Total * 100,
`% over 125,000` = (100 - `% under 125,000`),
`% under 150,000` = `Under 150,000` / Total * 100,
`% over 150,000` = (100 - `% under 150,000`),
) %>%
left_join(bay_median_income_by_block %>% dplyr::select(-Median_Income)
) %>%
filter(!is.na(device_count))
# plotting
bay_ami_by_block %>%
ggplot(aes(
x = `% over 75,000`,
y = `% Not Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $75,000 annually",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Households Above $75,000"
)
income_75_model <- lm(`% Not Completely at Home` ~ `% over 75,000`, bay_ami_by_block)
summary(income_75_model)
##
## Call:
## lm(formula = `% Not Completely at Home` ~ `% over 75,000`, data = bay_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.905 -5.551 -0.528 4.965 42.829
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.686852 0.407889 153.69 <2e-16 ***
## `% over 75,000` -0.153156 0.006411 -23.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.52 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.1077, Adjusted R-squared: 0.1075
## F-statistic: 570.7 on 1 and 4728 DF, p-value: < 2.2e-16
# income - less than $100000
bay_ami_by_block %>%
ggplot(aes(
x = `% over 100,000`,
y = `% Not Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $100,000 annually",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Households Above $100,000"
)
income_100_model <- lm(`% Not Completely at Home` ~ `% over 100,000`, bay_ami_by_block)
summary(income_100_model)
##
## Call:
## lm(formula = `% Not Completely at Home` ~ `% over 100,000`, data = bay_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -23.799 -5.575 -0.628 4.788 44.767
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.086199 0.315331 193.72 <2e-16 ***
## `% over 100,000` -0.156528 0.005921 -26.44 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.419 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.1288, Adjusted R-squared: 0.1286
## F-statistic: 699 on 1 and 4728 DF, p-value: < 2.2e-16
# income - less than $125000
bay_ami_by_block %>%
ggplot(aes(
x = `% over 125,000`,
y = `% Not Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $125,000 annually",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Households Above $125,000"
)
income_125_model <- lm(`% Not Completely at Home` ~ `% over 125,000`, bay_ami_by_block)
summary(income_125_model)
##
## Call:
## lm(formula = `% Not Completely at Home` ~ `% over 125,000`, data = bay_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -25.719 -5.535 -0.563 4.693 46.825
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 59.88233 0.26223 228.36 <2e-16 ***
## `% over 125,000` -0.16507 0.00592 -27.88 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.358 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.1412, Adjusted R-squared: 0.141
## F-statistic: 777.5 on 1 and 4728 DF, p-value: < 2.2e-16
# income - less than $150000
bay_ami_by_block %>%
ggplot(aes(
x = `% over 150,000`,
y = `% Not Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $150,000 annually",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Households Above $150,000"
)
income_150_model <- lm(`% Not Completely at Home` ~ `% over 150,000`, bay_ami_by_block)
summary(income_150_model)
##
## Call:
## lm(formula = `% Not Completely at Home` ~ `% over 150,000`, data = bay_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.131 -5.471 -0.559 4.771 45.293
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 58.746787 0.229335 256.2 <2e-16 ***
## `% over 150,000` -0.170958 0.006217 -27.5 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.375 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.1379, Adjusted R-squared: 0.1377
## F-statistic: 756.1 on 1 and 4728 DF, p-value: < 2.2e-16
Compare to pre-shelter-in-place behavior:
bay_ami_by_block %>%
ggplot(aes(
x = `% over 75,000`,
y = `% Not Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $75,000 annually",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and Households Above $75,000 Pre Shelter-in-Place"
)
income_75_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% over 75,000`, bay_ami_by_block)
summary(income_75_model2)
##
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% over 75,000`,
## data = bay_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.095 -2.942 0.245 3.290 20.435
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 70.965396 0.239540 296.26 <2e-16 ***
## `% over 75,000` 0.113036 0.003765 30.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.003 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.1601, Adjusted R-squared: 0.1599
## F-statistic: 901.3 on 1 and 4728 DF, p-value: < 2.2e-16
# income - less than $100000
bay_ami_by_block %>%
ggplot(aes(
x = `% over 100,000`,
y = `% Not Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $100,000 annually",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and Households Above $100,000 Pre Shelter-in-Place"
)
income_100_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% over 100,000`, bay_ami_by_block)
summary(income_100_model2)
##
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% over 100,000`,
## data = bay_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.2334 -2.9109 0.3057 3.3078 18.9262
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.473764 0.186304 389.01 <2e-16 ***
## `% over 100,000` 0.108863 0.003498 31.12 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.974 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.17, Adjusted R-squared: 0.1699
## F-statistic: 968.6 on 1 and 4728 DF, p-value: < 2.2e-16
# income - less than $125000
bay_ami_by_block %>%
ggplot(aes(
x = `% over 125,000`,
y = `% Not Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $125,000 annually",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "Bay Area: Social Distancing and Households Above $125,000 Pre Shelter-in-Place"
)
income_125_model <- lm(`% Not Completely at Home Pre Shelter` ~ `% over 125,000`, bay_ami_by_block)
summary(income_125_model)
##
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% over 125,000`,
## data = bay_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.3512 -2.8027 0.3572 3.2505 17.9572
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 73.442753 0.155512 472.26 <2e-16 ***
## `% over 125,000` 0.111451 0.003511 31.74 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.957 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.1757, Adjusted R-squared: 0.1755
## F-statistic: 1008 on 1 and 4728 DF, p-value: < 2.2e-16
# income - less than $150000
bay_ami_by_block %>%
ggplot(aes(
x = `% over 150,000`,
y = `% Not Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $150,000 annually",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "Bay Area: Social Distancing and Households Above $150,000 Pre Shelter-in-Place"
)
income_150_model <- lm(`% Not Completely at Home Pre Shelter` ~ `% over 150,000`, bay_ami_by_block)
summary(income_150_model)
##
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% over 150,000`,
## data = bay_ami_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.373 -2.878 0.360 3.302 17.222
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 74.178083 0.135838 546.08 <2e-16 ***
## `% over 150,000` 0.116426 0.003683 31.61 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 4.96 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.1745, Adjusted R-squared: 0.1743
## F-statistic: 999.5 on 1 and 4728 DF, p-value: < 2.2e-16
# loading in language data - code adapted from other students
bay_lang_by_block <-
pullCensus("group(B16004)", bay_area_counties) %>%
select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(
key = "variable",
value = "estimate",
- blockgroup
) %>%
left_join(acs_vars, by = c("variable" = "name")) %>%
mutate(
tier = substr(label,lapply(label, function(x) max(unlist(gregexpr('!!',x)))+2),nchar(label))
) %>%
filter(tier %in% c('Speak English "not well"',
'Speak English "not at all"',
'Total', 'Speak Spanish',
'Speak Asian and Pacific Island languages')) %>%
group_by(blockgroup, tier) %>%
summarise(
estimate1 = sum(estimate)
) %>%
spread(
key = "tier",
value = "estimate1"
) %>%
mutate(
`% speaking english < well` = (`Speak English "not well"` + `Speak English "not at all"`) / Total * 100,
`% speaking english > well` = (100 - `% speaking english < well`),
`% speaking spanish` = (`Speak Spanish`/ Total) * 100,
`% not speaking spanish` = (100 - `% speaking spanish`),
`% speaking api` = (`Speak Asian and Pacific Island languages` / Total) * 100
) %>%
left_join(bay_median_income_by_block %>% dplyr::select(-Median_Income)) %>%
filter(!is.na(device_count)) %>%
mutate(log_perc = log(`% speaking english < well`))
# plotting
bay_lang_by_block %>%
ggplot(aes(
x = `% speaking english > well`,
y = `% Not Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of individuals speaking English well",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and English Language Ability"
)
english_ability_model <- lm(`% Not Completely at Home` ~ `% speaking english > well`, bay_lang_by_block)
summary(english_ability_model)
##
## Call:
## lm(formula = `% Not Completely at Home` ~ `% speaking english > well`,
## data = bay_lang_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.907 -5.878 -0.330 5.561 40.918
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 54.156680 1.438832 37.639 <2e-16 ***
## `% speaking english > well` -0.007796 0.015520 -0.502 0.615
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.068 on 4734 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 5.33e-05, Adjusted R-squared: -0.0001579
## F-statistic: 0.2523 on 1 and 4734 DF, p-value: 0.6155
bay_lang_by_block %>%
ggplot(aes(
x = `% not speaking spanish`,
y = `% Not Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of individuals not speaking Spanish",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Spanish Language Ability"
)
spanish_speaking_model <- lm(`% Not Completely at Home` ~ `% not speaking spanish`, bay_lang_by_block)
summary(spanish_speaking_model)
##
## Call:
## lm(formula = `% Not Completely at Home` ~ `% not speaking spanish`,
## data = bay_lang_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.653 -5.692 -0.521 5.032 41.760
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 63.418350 0.628664 100.88 <2e-16 ***
## `% not speaking spanish` -0.118836 0.007327 -16.22 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.826 on 4734 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.05264, Adjusted R-squared: 0.05244
## F-statistic: 263 on 1 and 4734 DF, p-value: < 2.2e-16
Compare to pre-shelter-in-place behavior:
bay_lang_by_block %>%
ggplot(aes(
x = `% speaking english > well`,
y = `% Not Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of individuals speaking English well",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and English Language Ability Pre Shelter-in-Place"
)
english_ability_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% speaking english > well`, bay_lang_by_block)
summary(english_ability_model2)
##
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% speaking english > well`,
## data = bay_lang_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.866 -3.136 0.362 3.676 14.914
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.829284 0.837774 73.80 <2e-16 ***
## `% speaking english > well` 0.173148 0.009037 19.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.279 on 4732 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.07199, Adjusted R-squared: 0.0718
## F-statistic: 367.1 on 1 and 4732 DF, p-value: < 2.2e-16
bay_lang_by_block %>%
ggplot(aes(
x = `% not speaking spanish`,
y = `% Not Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of individuals not speaking Spanish",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and Spanish Language Ability Pre Shelter-in-Place"
)
spanish_speaking_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% not speaking spanish`, bay_lang_by_block)
summary(spanish_speaking_model2)
##
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% not speaking spanish`,
## data = bay_lang_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.848 -3.159 0.413 3.603 13.869
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 70.818907 0.376268 188.21 <2e-16 ***
## `% not speaking spanish` 0.083275 0.004386 18.99 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.283 on 4732 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.0708, Adjusted R-squared: 0.07061
## F-statistic: 360.6 on 1 and 4732 DF, p-value: < 2.2e-16
# loading in age data - specifically looking at percentage 65+ and percentage <30
bay_age_by_block <-
pullCensus("group(B01001)", bay_area_counties) %>%
select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(
key = "variable",
value = "estimate",
- blockgroup
) %>%
mutate(
label = acs_vars$label[match(variable,acs_vars$name)]
) %>%
select(-variable) %>%
separate(
label,
into = c(NA,NA,"sex","age"),
sep = "!!"
) %>% filter(!is.na(age)) %>%
mutate(elderly = ifelse(age %in% c("65 and 66 years", "67 to 69 years", "70 to 74 years", "75 to 79 years", "80 to 84 years", "85 years and over"), estimate, NA), `less than 30` = ifelse(age %in% c("Under 5 years", "5 to 9 years", "10 to 14 years", "15 to 17 years", "18 and 19 years", "20 years", "21 years", "22 to 24 years", "25 to 29 years"), estimate, NA)) %>%
group_by(blockgroup) %>%
summarize(elderly = sum(elderly, na.rm = T), `less than 30` = sum(`less than 30`, na.rm = T), total = sum(estimate, na.rm = T)) %>%
mutate(`percent elderly` = elderly*100 / total, `percent less than 30` = `less than 30`*100 / total, `percent nonelderly` = (100 - `percent elderly`)) %>%
left_join(bay_median_income_by_block %>% dplyr::select(-Median_Income)) %>%
filter(!is.na(device_count))
# plotting
bay_age_by_block %>%
ggplot(aes(
x = `percent less than 30`,
y = `% Not Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents younger than 30",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Young Age Groups"
)
young_model <- lm(bay_age_by_block$`% Not Completely at Home` ~ bay_age_by_block$`percent less than 30`)
summary(young_model)
##
## Call:
## lm(formula = bay_age_by_block$`% Not Completely at Home` ~ bay_age_by_block$`percent less than 30`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.460 -5.764 -0.312 5.371 37.944
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.10671 0.46082 106.564 <2e-16
## bay_age_by_block$`percent less than 30` 0.12215 0.01247 9.798 <2e-16
##
## (Intercept) ***
## bay_age_by_block$`percent less than 30` ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.978 on 4734 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.01987, Adjusted R-squared: 0.01967
## F-statistic: 95.99 on 1 and 4734 DF, p-value: < 2.2e-16
bay_age_by_block %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
ggplot(aes(
x = `percent elderly`,
y = `% Not Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents 65 and older",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Elderly Population"
)
elderly_model <- lm(`% Not Completely at Home` ~ `percent elderly`, bay_age_by_block %>% filter(`percent elderly` < 50))
summary(elderly_model)
##
## Call:
## lm(formula = `% Not Completely at Home` ~ `percent elderly`,
## data = bay_age_by_block %>% filter(`percent elderly` < 50))
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.586 -5.830 -0.356 5.557 40.088
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 54.24498 0.28023 193.58 < 2e-16 ***
## `percent elderly` -0.05550 0.01618 -3.43 0.000609 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.036 on 4693 degrees of freedom
## Multiple R-squared: 0.002501, Adjusted R-squared: 0.002288
## F-statistic: 11.76 on 1 and 4693 DF, p-value: 0.0006089
Compare to pre-shelter-in-place behavior:
bay_age_by_block %>%
ggplot(aes(
x = `percent less than 30`,
y = `% Not Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents younger than 30",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and Young Age Groups Pre Shelter-in-Place"
)
young_model2 <- lm(bay_age_by_block$`% Not Completely at Home Pre Shelter` ~ bay_age_by_block$`percent less than 30`)
summary(young_model2)
##
## Call:
## lm(formula = bay_age_by_block$`% Not Completely at Home Pre Shelter` ~
## bay_age_by_block$`percent less than 30`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.649 -3.332 0.283 3.749 15.296
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 78.773529 0.281877 279.460 < 2e-16
## bay_age_by_block$`percent less than 30` -0.027096 0.007631 -3.551 0.000388
##
## (Intercept) ***
## bay_age_by_block$`percent less than 30` ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.473 on 4732 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.002657, Adjusted R-squared: 0.002447
## F-statistic: 12.61 on 1 and 4732 DF, p-value: 0.0003878
bay_age_by_block %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
ggplot(aes(
x = `percent elderly`,
y = `% Not Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents 65 and older",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and Elderly Population Pre Shelter-in-Place"
)
elderly_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `percent elderly`, bay_age_by_block %>% filter(`percent elderly` < 50))
summary(elderly_model2)
##
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `percent elderly`,
## data = bay_age_by_block %>% filter(`percent elderly` < 50))
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.714 -3.316 0.331 3.681 14.810
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 76.546794 0.167597 456.730 <2e-16 ***
## `percent elderly` 0.085606 0.009675 8.848 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.399 on 4691 degrees of freedom
## (2 observations deleted due to missingness)
## Multiple R-squared: 0.01642, Adjusted R-squared: 0.01621
## F-statistic: 78.29 on 1 and 4691 DF, p-value: < 2.2e-16
# also get data on vehicles available as households without a vehicle
bay_no_vehicles_by_block <- pullCensus("group(B25044)", bay_area_counties) %>%
select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
select(-variable) %>%
separate(label, into = c(NA, NA, NA,"vehicles"), sep = "!!") %>%
filter(!is.na(vehicles)) %>%
group_by(blockgroup, vehicles) %>%
summarize(grouped_vehicles = sum(estimate)) %>%
spread(key = vehicles, value = grouped_vehicles) %>%
mutate(total_nums = `1 vehicle available` + `2 vehicles available` + `3 vehicles available` + `4 vehicles available` + `5 or more vehicles available` + `No vehicle available`, `percent no vehicles` = `No vehicle available`*100 / total_nums, `percent with vehicles` = (100-`percent no vehicles`)) %>%
left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
# plotting
bay_no_vehicles_by_block %>%
ggplot(aes(
x = `percent with vehicles`,
y = `% Not Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with vehicles available",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Vehicle Availability"
)
vehicles_model <- lm(`% Not Completely at Home` ~ `percent with vehicles`, bay_no_vehicles_by_block)
summary(vehicles_model)
##
## Call:
## lm(formula = `% Not Completely at Home` ~ `percent with vehicles`,
## data = bay_no_vehicles_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.539 -5.918 -0.297 5.597 41.035
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 57.35685 0.96724 59.299 < 2e-16 ***
## `percent with vehicles` -0.04318 0.01047 -4.125 3.76e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.003 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.003587, Adjusted R-squared: 0.003376
## F-statistic: 17.02 on 1 and 4728 DF, p-value: 3.763e-05
Compare to pre-shelter-in-place behavior:
bay_no_vehicles_by_block %>%
ggplot(aes(
x = `percent with vehicles`,
y = `% Not Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with vehicles available",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and Vehicle Availability Pre Shelter-in-Place"
)
vehicles_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `percent with vehicles`, bay_no_vehicles_by_block)
summary(vehicles_model2)
##
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `percent with vehicles`,
## data = bay_no_vehicles_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.3199 -3.2767 0.2145 3.5354 22.1954
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 66.925407 0.564331 118.59 <2e-16 ***
## `percent with vehicles` 0.118945 0.006106 19.48 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.253 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.07429, Adjusted R-squared: 0.0741
## F-statistic: 379.5 on 1 and 4728 DF, p-value: < 2.2e-16
# get data on occupants per room
bay_occupants_per_room_by_block <- pullCensus("group(B25014)", bay_area_counties) %>%
select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
select(-variable) %>%
separate(label, into = c(NA, NA, NA,"occupants per room"), sep = "!!") %>%
filter(!is.na(`occupants per room`)) %>%
group_by(blockgroup, `occupants per room`) %>%
summarize(estimate_tot = sum(estimate)) %>%
spread(key = `occupants per room`, value = estimate_tot) %>%
mutate(total_nums = `0.50 or less occupants per room` + `0.51 to 1.00 occupants per room` + `1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`, `percent 1 or more` = (`1.01 to 1.50 occupants per room` + `1.51 to 2.00 occupants per room` + `2.01 or more occupants per room`) * 100/ total_nums, `percent less than 1` = (100-`percent 1 or more`)) %>%
left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
# plotting
bay_occupants_per_room_by_block %>%
ggplot(aes(
x = `percent less than 1`,
y = `% Not Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households with 1 or fewer occupant per room",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Room Occupancy"
)
occupants_model <- lm(`% Not Completely at Home` ~ `percent less than 1`, bay_occupants_per_room_by_block)
summary(occupants_model)
##
## Call:
## lm(formula = `% Not Completely at Home` ~ `percent less than 1`,
## data = bay_occupants_per_room_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.306 -5.793 -0.317 5.481 41.080
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 61.70399 1.43003 43.149 < 2e-16 ***
## `percent less than 1` -0.08898 0.01526 -5.829 5.95e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.987 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.007135, Adjusted R-squared: 0.006925
## F-statistic: 33.98 on 1 and 4728 DF, p-value: 5.946e-09
Compare to pre-shelter-in-place behavior:
bay_occupants_per_room_by_block %>%
ggplot(aes(
x = `percent less than 1`,
y = `% Not Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households with 1 or fewer occupant per room",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and Room Occupancy Pre Shelter-in-Place"
)
occupants_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `percent less than 1`, bay_occupants_per_room_by_block)
summary(occupants_model2)
##
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `percent less than 1`,
## data = bay_occupants_per_room_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -30.361 -3.161 0.316 3.671 17.116
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 63.306072 0.842479 75.14 <2e-16 ***
## `percent less than 1` 0.155550 0.008993 17.30 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.295 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.05951, Adjusted R-squared: 0.05931
## F-statistic: 299.2 on 1 and 4728 DF, p-value: < 2.2e-16
bay_education_by_block <- pullCensus("group(B15003)", bay_area_counties) %>%
select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
select(-variable) %>%
separate(label, into = c(NA, NA, "education level"), sep = "!!") %>%
mutate(`education level` = replace_na(`education level`, "total_educ")) %>% # if the education level field is NA, this corresponded to the total number in that blockgroup
spread(key = `education level`, value = estimate) %>%
mutate(`percent associates or higher` = (`Associate's degree` + `Bachelor's degree` + `Doctorate degree` + `Master's degree`)*100/total_educ, `percent less than associates` = 100-`percent associates or higher`) %>%
left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
# plotting
bay_education_by_block %>%
ggplot(aes(
x = `percent associates or higher`,
y = `% Not Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of people with an degree at Associate's level or higher",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Education"
)
educ_model <- lm(`% Not Completely at Home` ~ `percent associates or higher`, bay_education_by_block)
summary(educ_model)
##
## Call:
## lm(formula = `% Not Completely at Home` ~ `percent associates or higher`,
## data = bay_education_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.065 -5.606 -0.766 4.826 43.742
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 60.502867 0.338627 178.67 <2e-16 ***
## `percent associates or higher` -0.140169 0.006236 -22.48 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.608 on 4733 degrees of freedom
## (8 observations deleted due to missingness)
## Multiple R-squared: 0.09645, Adjusted R-squared: 0.09626
## F-statistic: 505.2 on 1 and 4733 DF, p-value: < 2.2e-16
Compare to pre-shelter-in-place behavior:
bay_education_by_block %>%
ggplot(aes(
x = `percent associates or higher`,
y = `% Not Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of people with an degree at Associate's level or higher",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and Education Pre Shelter-in-Place"
)
educ_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `percent associates or higher`, bay_education_by_block)
summary(educ_model2)
##
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `percent associates or higher`,
## data = bay_education_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.2636 -3.0081 0.4311 3.4893 15.3499
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 73.135228 0.202927 360.40 <2e-16 ***
## `percent associates or higher` 0.092690 0.003737 24.81 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.155 on 4732 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.1151, Adjusted R-squared: 0.1149
## F-statistic: 615.3 on 1 and 4732 DF, p-value: < 2.2e-16
Motivated by this paper https://www.nber.org/papers/w26982.pdf on social distancing, internet access, and inequality, we look at whether a household has “Broadband (high-speed) Internet service such as cable, fiber optic, or DSL service,” and staying at home.
bay_internet_by_block <- pullCensus("group(B28002)", bay_area_counties) %>%
select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
select(-variable) %>%
separate(label, into = c(NA, NA, "subscription", "type", "additional"), sep = "!!") %>%
filter(is.na(subscription) | (type == "Broadband such as cable, fiber optic or DSL") & is.na(additional)) %>%
mutate(type = replace_na(type, "total_num")) %>%
dplyr::select(blockgroup, type, estimate) %>%
spread(key = type, value = estimate) %>%
mutate(`percent high speed` = `Broadband such as cable, fiber optic or DSL`*100/total_num, `percent no high speed` = 100-`percent high speed`) %>%
left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
# plotting
bay_internet_by_block %>%
ggplot(aes(
x = `percent high speed`,
y = `% Not Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households with broadband such as cable, fiber optic or DSL",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and High Speed Internet"
)
internet_model <- lm(`% Not Completely at Home` ~ `percent high speed`, bay_internet_by_block)
summary(internet_model)
##
## Call:
## lm(formula = `% Not Completely at Home` ~ `percent high speed`,
## data = bay_internet_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -28.509 -5.619 -0.439 5.044 44.047
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 69.134985 0.744851 92.82 <2e-16 ***
## `percent high speed` -0.198027 0.009243 -21.43 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.611 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.0885, Adjusted R-squared: 0.08831
## F-statistic: 459.1 on 1 and 4728 DF, p-value: < 2.2e-16
Compare to pre-shelter-in-place behavior:
bay_internet_by_block %>%
ggplot(aes(
x = `percent high speed`,
y = `% Not Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households without broadband such as cable, fiber optic or DSL",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "Bay Area: Staying at Home and High Speed Internet Pre Shelter-in-Place"
)
internet_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `percent high speed`, bay_internet_by_block)
summary(internet_model2)
##
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `percent high speed`,
## data = bay_internet_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.3893 -3.1107 0.1585 3.5344 20.8729
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 67.53971 0.44725 151.01 <2e-16 ***
## `percent high speed` 0.12937 0.00555 23.31 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.171 on 4728 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.1031, Adjusted R-squared: 0.1029
## F-statistic: 543.4 on 1 and 4728 DF, p-value: < 2.2e-16
bay_race_by_block <- pullCensus("group(B02001)", bay_area_counties) %>%
select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
select(-variable) %>%
separate(label, into = c(NA, NA, "race", "specification"), sep = "!!") %>%
filter(is.na(specification) & !is.na(race)) %>%
dplyr::select(blockgroup, estimate, race) %>%
spread(key = race, value = estimate) %>%
mutate(total_race = `American Indian and Alaska Native alone` + `Asian alone` + `Black or African American alone` + `Native Hawaiian and Other Pacific Islander alone` + `Some other race alone` + `Two or more races` + `White alone`, `% white` = `White alone`*100/total_race, `% Asian` = `Asian alone`*100/total_race, `% black` = `Black or African American alone`*100/total_race) %>%
left_join(bay_age_by_block %>% dplyr::select_if(!names(.) %in% c("elderly", "percent elderly", "less than 30", "percent less than 30"))) %>%
filter(!is.na(device_count))
# also get ethnicity data (hispanic/latino vs not)
bay_hisplat_by_block <- pullCensus("group(B03002)", bay_area_counties) %>%
select(-c(contains("EA"),contains("MA"),contains("M"))) %>%
gather(key = "variable", value = "estimate", -blockgroup) %>%
mutate(label = acs_vars$label[match(variable,acs_vars$name)]) %>%
select(-variable) %>%
separate(label, into = c(NA, NA, "hisp/lat", "specification"), sep = "!!") %>%
filter(is.na(specification) & !is.na(`hisp/lat`)) %>%
dplyr::select(blockgroup, estimate, `hisp/lat`) %>%
spread(key = `hisp/lat`, value = estimate) %>%
mutate(`% non hispanic/latino` = `Not Hispanic or Latino`*100/(`Hispanic or Latino` + `Not Hispanic or Latino`))
# join with the race data
bay_race_by_block <- bay_race_by_block %>% left_join(bay_hisplat_by_block %>% dplyr::select(blockgroup, `% non hispanic/latino`))
# plotting
# percent white
bay_race_by_block %>%
ggplot(aes(
x = `% white`,
y = `% Not Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are white",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and White Residents"
)
white_model <- lm(`% Not Completely at Home` ~ `% white`, bay_race_by_block)
summary(white_model)
##
## Call:
## lm(formula = `% Not Completely at Home` ~ `% white`, data = bay_race_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.209 -5.867 -0.325 5.406 40.720
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 49.017281 0.317038 154.61 <2e-16 ***
## `% white` 0.082034 0.005378 15.25 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.853 on 4734 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.04684, Adjusted R-squared: 0.04664
## F-statistic: 232.6 on 1 and 4734 DF, p-value: < 2.2e-16
# percent Asian
bay_race_by_block %>%
ggplot(aes(
x = `% Asian`,
y = `% Not Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are Asian",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Asian Residents"
)
asian_model <- lm(`% Not Completely at Home` ~ `% Asian`, bay_race_by_block)
summary(asian_model)
##
## Call:
## lm(formula = `% Not Completely at Home` ~ `% Asian`, data = bay_race_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.858 -5.282 -0.568 4.575 43.156
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 58.014237 0.178989 324.12 <2e-16 ***
## `% Asian` -0.193485 0.005685 -34.03 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.128 on 4734 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.1966, Adjusted R-squared: 0.1964
## F-statistic: 1158 on 1 and 4734 DF, p-value: < 2.2e-16
# percent non hispanic/latino
bay_race_by_block %>%
ggplot(aes(
x = `% non hispanic/latino`,
y = `% Not Completely at Home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are not Hispanic or Latino",
y = "Percent devices leaving home on weekdays since shelter-in-place",
title = "Bay Area: Social Distancing and Hispanic/Latino Residents"
)
hisp_model <- lm(`% Not Completely at Home` ~ `% non hispanic/latino`, bay_race_by_block)
summary(hisp_model)
##
## Call:
## lm(formula = `% Not Completely at Home` ~ `% non hispanic/latino`,
## data = bay_race_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -27.693 -5.689 -0.546 4.941 41.777
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 62.566651 0.522442 119.76 <2e-16 ***
## `% non hispanic/latino` -0.117579 0.006525 -18.02 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.772 on 4734 degrees of freedom
## (7 observations deleted due to missingness)
## Multiple R-squared: 0.06419, Adjusted R-squared: 0.06399
## F-statistic: 324.7 on 1 and 4734 DF, p-value: < 2.2e-16
Compare to pre-shelter-in-place behavior:
bay_race_by_block %>%
ggplot(aes(
x = `% white`,
y = `% Not Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are white",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "Bay Area: Social Distancing and White Residents Pre Shelter-in-Place"
)
white_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% white`, bay_race_by_block)
summary(white_model2)
##
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% white`,
## data = bay_race_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -31.636 -3.187 0.365 3.610 14.301
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 74.825617 0.190483 392.82 <2e-16 ***
## `% white` 0.055451 0.003231 17.16 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.317 on 4732 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.05859, Adjusted R-squared: 0.05839
## F-statistic: 294.5 on 1 and 4732 DF, p-value: < 2.2e-16
bay_race_by_block %>%
ggplot(aes(
x = `% Asian`,
y = `% Not Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are Asian",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "Bay Area: Social Distancing and Asian Residents Pre Shelter-in-Place"
)
asian_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% Asian`, bay_race_by_block)
summary(asian_model2)
##
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% Asian`,
## data = bay_race_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.937 -3.425 0.249 3.755 14.494
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 77.600043 0.120661 643.127 <2e-16 ***
## `% Asian` 0.009013 0.003832 2.352 0.0187 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.477 on 4732 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.001168, Adjusted R-squared: 0.0009568
## F-statistic: 5.533 on 1 and 4732 DF, p-value: 0.0187
bay_race_by_block %>%
ggplot(aes(
x = `% non hispanic/latino`,
y = `% Not Completely at Home Pre Shelter`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are not Hispanic or Latino",
y = "Percent devices leaving home on weekdays pre-shelter-in-place",
title = "Bay Area: Social Distancing and Hispanic/Latino Residents Pre Shelter-in-Place"
)
hisp_model2 <- lm(`% Not Completely at Home Pre Shelter` ~ `% non hispanic/latino`, bay_race_by_block)
summary(hisp_model2)
##
## Call:
## lm(formula = `% Not Completely at Home Pre Shelter` ~ `% non hispanic/latino`,
## data = bay_race_by_block)
##
## Residuals:
## Min 1Q Median 3Q Max
## -33.022 -3.189 0.459 3.605 17.674
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 72.007971 0.314583 228.90 <2e-16 ***
## `% non hispanic/latino` 0.074764 0.003929 19.03 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.282 on 4732 degrees of freedom
## (9 observations deleted due to missingness)
## Multiple R-squared: 0.07109, Adjusted R-squared: 0.07089
## F-statistic: 362.1 on 1 and 4732 DF, p-value: < 2.2e-16
Multiple regression analysis with income, education, and internet
# multiple regression
modeltest <- lm(bay_ami_by_block$`% Not Completely at Home` ~ bay_ami_by_block$`% over 125,000` + bay_education_by_block$`percent associates or higher` + bay_internet_by_block$`percent high speed`)
summary(modeltest)
##
## Call:
## lm(formula = bay_ami_by_block$`% Not Completely at Home` ~ bay_ami_by_block$`% over 125,000` +
## bay_education_by_block$`percent associates or higher` + bay_internet_by_block$`percent high speed`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -24.971 -5.549 -0.589 4.635 45.232
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 64.502846 0.764507
## bay_ami_by_block$`% over 125,000` -0.122709 0.009049
## bay_education_by_block$`percent associates or higher` -0.023364 0.008954
## bay_internet_by_block$`percent high speed` -0.064243 0.011652
## t value Pr(>|t|)
## (Intercept) 84.372 < 2e-16 ***
## bay_ami_by_block$`% over 125,000` -13.561 < 2e-16 ***
## bay_education_by_block$`percent associates or higher` -2.609 0.0091 **
## bay_internet_by_block$`percent high speed` -5.514 3.7e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.319 on 4726 degrees of freedom
## (13 observations deleted due to missingness)
## Multiple R-squared: 0.1497, Adjusted R-squared: 0.1492
## F-statistic: 277.3 on 3 and 4726 DF, p-value: < 2.2e-16
bay_dem_distancing <- bay_internet_by_block %>%
dplyr::select(`percent high speed`, `% Not Completely at Home`, `% Completely at Home`, blockgroup) %>%
left_join(bay_education_by_block %>% dplyr::select(blockgroup, `percent associates or higher`)) %>%
left_join(bay_ami_by_block %>% dplyr::select(blockgroup, `% over 125,000`)) %>%
left_join(bay_ami_by_block %>% dplyr::select(blockgroup, `% over 100,000`)) %>%
left_join(bay_ami_by_block %>% dplyr::select(blockgroup, `% over 75,000`)) %>%
left_join(bay_age_by_block %>% dplyr::select(blockgroup, `percent less than 30`)) %>%
left_join(bay_age_by_block %>% dplyr::select(blockgroup, `percent elderly`)) %>%
left_join(bay_lang_by_block %>% dplyr::select(blockgroup, `% not speaking spanish`)) %>%
left_join(bay_lang_by_block %>% dplyr::select(blockgroup, `% speaking english > well`)) %>%
left_join(bay_no_vehicles_by_block %>% dplyr::select(blockgroup, `percent with vehicles`)) %>%
left_join(bay_occupants_per_room_by_block %>% dplyr::select(blockgroup, `percent less than 1`)) %>%
left_join(bay_race_by_block %>% dplyr::select(blockgroup, `% white`, `% Asian`, `% non hispanic/latino`))
bay_dem_distancing_pre_post <- bay_dem_distancing %>%
left_join(bay_internet_by_block %>% dplyr::select(`% Not Completely at Home Pre Shelter`, `% Completely at Home Pre Shelter`, blockgroup)) %>%
mutate(`% increase in staying completely home` = `% Completely at Home` - `% Completely at Home Pre Shelter`, frac_increase = `% increase in staying completely home`/`% Completely at Home Pre Shelter`)
bay_dem_distancing[is.na(bay_dem_distancing)] <- 0
bay_dem_distancing_pre_post[is.na(bay_dem_distancing_pre_post)] <- 0
saveRDS(bay_dem_distancing_pre_post, "/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/bay_socialdistancing_demdata_prepostdifs_manyvars.rds")
# bay_dem_distancing_pre_post <- readRDS("/Users/simonespeizer/Documents/2020 Spring Quarter/CEE 218Z/covid19/Simone_Speizer/bay_socialdistancing_demdata_prepostdifs_manyvars.rds")
# age
bay_dem_distancing_pre_post %>%
ggplot(aes(
x = `percent less than 30`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents younger than 30",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "Bay Area: Social Distancing and Young Age Groups"
)
young_model_dif <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`percent less than 30`)
summary(young_model_dif)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~
## bay_dem_distancing_pre_post$`percent less than 30`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -60.100 -6.430 -0.026 6.949 33.015
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 29.02612 0.54494 53.265
## bay_dem_distancing_pre_post$`percent less than 30` -0.13241 0.01475 -8.975
## Pr(>|t|)
## (Intercept) <2e-16 ***
## bay_dem_distancing_pre_post$`percent less than 30` <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.71 on 4741 degrees of freedom
## Multiple R-squared: 0.01671, Adjusted R-squared: 0.0165
## F-statistic: 80.55 on 1 and 4741 DF, p-value: < 2.2e-16
young_model_frac <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`percent less than 30`)
summary(young_model_frac)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`percent less than 30`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2784 -0.4980 -0.1216 0.3772 4.0286
##
## Coefficients:
## Estimate Std. Error t value
## (Intercept) 1.595957 0.037886 42.125
## bay_dem_distancing_pre_post$`percent less than 30` -0.010209 0.001026 -9.953
## Pr(>|t|)
## (Intercept) <2e-16 ***
## bay_dem_distancing_pre_post$`percent less than 30` <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7449 on 4741 degrees of freedom
## Multiple R-squared: 0.02047, Adjusted R-squared: 0.02026
## F-statistic: 99.07 on 1 and 4741 DF, p-value: < 2.2e-16
bay_dem_distancing_pre_post %>% filter(`percent elderly` < 50) %>% # get rid of extreme outliers
ggplot(aes(
x = `percent elderly`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents 65 and older",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "Bay Area: Social Distancing and Elderly Population"
)
elderly_model_dif <- lm(`% increase in staying completely home` ~ `percent elderly`, bay_dem_distancing_pre_post %>% filter(`percent elderly` < 50))
summary(elderly_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent elderly`,
## data = bay_dem_distancing_pre_post %>% filter(`percent elderly` <
## 50))
##
## Residuals:
## Min 1Q Median 3Q Max
## -62.210 -6.562 -0.043 7.023 33.265
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 22.14817 0.33111 66.892 < 2e-16 ***
## `percent elderly` 0.14894 0.01913 7.784 8.55e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.71 on 4700 degrees of freedom
## Multiple R-squared: 0.01273, Adjusted R-squared: 0.01252
## F-statistic: 60.6 on 1 and 4700 DF, p-value: 8.551e-15
elderly_model_frac <- lm(frac_increase ~ `percent elderly`, bay_dem_distancing_pre_post %>% filter(`percent elderly` < 50))
summary(elderly_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `percent elderly`, data = bay_dem_distancing_pre_post %>%
## filter(`percent elderly` < 50))
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0865 -0.5038 -0.1219 0.3770 4.0081
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.013722 0.022954 44.16 <2e-16 ***
## `percent elderly` 0.014829 0.001326 11.18 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7427 on 4700 degrees of freedom
## Multiple R-squared: 0.02591, Adjusted R-squared: 0.0257
## F-statistic: 125 on 1 and 4700 DF, p-value: < 2.2e-16
# income - less than $75000
bay_dem_distancing_pre_post %>%
ggplot(aes(
x = `% over 75,000`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $75,000 (50% AMI) annually",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "Bay Area: Social Distancing and Households Above 50% AMI"
)
income_75_model_dif <- lm(`% increase in staying completely home` ~ `% over 75,000`, bay_dem_distancing_pre_post)
summary(income_75_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% over 75,000`,
## data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.250 -5.221 0.455 5.961 32.420
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.953550 0.444147 17.91 <2e-16 ***
## `% over 75,000` 0.271059 0.006991 38.77 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.414 on 4741 degrees of freedom
## Multiple R-squared: 0.2408, Adjusted R-squared: 0.2406
## F-statistic: 1503 on 1 and 4741 DF, p-value: < 2.2e-16
income_75_model_frac <- lm(frac_increase ~ `% over 75,000`, bay_dem_distancing_pre_post)
summary(income_75_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% over 75,000`, data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.2526 -0.3937 -0.0506 0.3249 3.8100
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0393374 0.0304605 1.291 0.197
## `% over 75,000` 0.0197722 0.0004794 41.240 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6457 on 4741 degrees of freedom
## Multiple R-squared: 0.264, Adjusted R-squared: 0.2639
## F-statistic: 1701 on 1 and 4741 DF, p-value: < 2.2e-16
# income - less than $100000
bay_dem_distancing_pre_post %>%
ggplot(aes(
x = `% over 100,000`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $100,000 (80% AMI) annually",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "Bay Area: Social Distancing and Households Below 80% AMI"
)
income_100_model_dif <- lm(`% increase in staying completely home` ~ `% over 100,000`, bay_dem_distancing_pre_post)
summary(income_100_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% over 100,000`,
## data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.646 -4.939 0.565 5.858 29.290
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 11.130681 0.342642 32.48 <2e-16 ***
## `% over 100,000` 0.269835 0.006442 41.89 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.231 on 4741 degrees of freedom
## Multiple R-squared: 0.2701, Adjusted R-squared: 0.2699
## F-statistic: 1754 on 1 and 4741 DF, p-value: < 2.2e-16
income_100_model_frac <- lm(frac_increase ~ `% over 100,000`, bay_dem_distancing_pre_post)
summary(income_100_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% over 100,000`, data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3549 -0.3751 -0.0358 0.3250 3.7199
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.2605015 0.0233278 11.17 <2e-16 ***
## `% over 100,000` 0.0198993 0.0004386 45.37 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6285 on 4741 degrees of freedom
## Multiple R-squared: 0.3027, Adjusted R-squared: 0.3026
## F-statistic: 2058 on 1 and 4741 DF, p-value: < 2.2e-16
# income - less than $125000
bay_dem_distancing_pre_post %>%
ggplot(aes(
x = `% over 125,000`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with incomes over $125,000 annually",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "Bay Area: Social Distancing and Households Below $125,000"
)
income_125_model_dif <- lm(`% increase in staying completely home` ~ `% over 125,000`, bay_dem_distancing_pre_post)
summary(income_125_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% over 125,000`,
## data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -67.492 -4.758 0.654 5.927 27.999
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 13.351784 0.284488 46.93 <2e-16 ***
## `% over 125,000` 0.280700 0.006432 43.64 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.126 on 4741 degrees of freedom
## Multiple R-squared: 0.2866, Adjusted R-squared: 0.2865
## F-statistic: 1905 on 1 and 4741 DF, p-value: < 2.2e-16
income_125_model_frac <- lm(frac_increase ~ `% over 125,000`, bay_dem_distancing_pre_post)
summary(income_125_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% over 125,000`, data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5762 -0.3554 -0.0266 0.3203 3.6897
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4145126 0.0192182 21.57 <2e-16 ***
## `% over 125,000` 0.0209505 0.0004345 48.22 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6165 on 4741 degrees of freedom
## Multiple R-squared: 0.3291, Adjusted R-squared: 0.3289
## F-statistic: 2325 on 1 and 4741 DF, p-value: < 2.2e-16
# language
bay_dem_distancing_pre_post %>%
ggplot(aes(
x = `% speaking english > well`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of individuals speaking English well",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "Bay Area: Social Distancing and English Language Ability"
)
english_ability_model_dif <- lm(`% increase in staying completely home` ~ `% speaking english > well`, bay_dem_distancing_pre_post)
summary(english_ability_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% speaking english > well`,
## data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -59.049 -6.490 0.120 6.887 32.881
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.05823 1.55805 3.888 0.000102 ***
## `% speaking english > well` 0.19831 0.01682 11.791 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.65 on 4741 degrees of freedom
## Multiple R-squared: 0.02849, Adjusted R-squared: 0.02829
## F-statistic: 139 on 1 and 4741 DF, p-value: < 2.2e-16
english_ability_model_frac <- lm(frac_increase ~ `% speaking english > well`, bay_dem_distancing_pre_post)
summary(english_ability_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% speaking english > well`, data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1691 -0.4819 -0.0973 0.3788 3.9882
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.570920 0.106910 -5.34 9.72e-08 ***
## `% speaking english > well` 0.019586 0.001154 16.97 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7307 on 4741 degrees of freedom
## Multiple R-squared: 0.05728, Adjusted R-squared: 0.05708
## F-statistic: 288 on 1 and 4741 DF, p-value: < 2.2e-16
bay_dem_distancing_pre_post %>%
ggplot(aes(
x = `% not speaking spanish`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of individuals not speaking Spanish",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "Bay Area: Social Distancing and Spanish Language Ability"
)
spanish_speaking_model_dif <- lm(`% increase in staying completely home` ~ `% not speaking spanish`, bay_dem_distancing_pre_post)
summary(spanish_speaking_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% not speaking spanish`,
## data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.610 -5.838 0.766 6.640 29.535
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.043840 0.711325 9.902 <2e-16 ***
## `% not speaking spanish` 0.206215 0.008297 24.855 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.16 on 4741 degrees of freedom
## Multiple R-squared: 0.1153, Adjusted R-squared: 0.1151
## F-statistic: 617.8 on 1 and 4741 DF, p-value: < 2.2e-16
spanish_speaking_model_frac <- lm(frac_increase ~ `% not speaking spanish`, bay_dem_distancing_pre_post)
summary(spanish_speaking_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% not speaking spanish`, data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1477 -0.4496 -0.0563 0.3612 3.8885
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0577044 0.0490605 -1.176 0.24
## `% not speaking spanish` 0.0154081 0.0005722 26.926 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7009 on 4741 degrees of freedom
## Multiple R-squared: 0.1326, Adjusted R-squared: 0.1325
## F-statistic: 725 on 1 and 4741 DF, p-value: < 2.2e-16
# occupants per room
bay_dem_distancing_pre_post %>%
ggplot(aes(
x = `percent less than 1`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households with 1 or fewer occupant per room",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "Bay Area: Social Distancing and Room Occupancy"
)
occupants_model_dif <- lm(`% increase in staying completely home` ~ `percent less than 1`, bay_dem_distancing_pre_post)
summary(occupants_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent less than 1`,
## data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.126 -6.426 0.247 6.874 32.852
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.4739 1.4499 0.327 0.744
## `percent less than 1` 0.2565 0.0155 16.551 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.51 on 4741 degrees of freedom
## Multiple R-squared: 0.05463, Adjusted R-squared: 0.05443
## F-statistic: 274 on 1 and 4741 DF, p-value: < 2.2e-16
occupants_model_frac <- lm(frac_increase ~ `percent less than 1`, bay_dem_distancing_pre_post)
summary(occupants_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `percent less than 1`, data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1174 -0.4880 -0.0965 0.3791 3.9044
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.701849 0.099951 -7.022 2.5e-12 ***
## `percent less than 1` 0.020814 0.001068 19.482 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7242 on 4741 degrees of freedom
## Multiple R-squared: 0.07412, Adjusted R-squared: 0.07393
## F-statistic: 379.5 on 1 and 4741 DF, p-value: < 2.2e-16
# vehicles - percent with no vehicles
bay_dem_distancing_pre_post %>%
ggplot(aes(
x = `percent with vehicles`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of housholds with vehicles available",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "Bay Area: Social Distancing and Vehicle Availability"
)
vehicles_model_dif <- lm(`% increase in staying completely home` ~ `percent with vehicles`, bay_dem_distancing_pre_post)
summary(vehicles_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent with vehicles`,
## data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -63.898 -6.598 0.045 7.086 31.302
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.93480 1.05527 7.519 6.55e-14 ***
## `percent with vehicles` 0.17964 0.01143 15.711 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.53 on 4741 degrees of freedom
## Multiple R-squared: 0.04949, Adjusted R-squared: 0.04929
## F-statistic: 246.8 on 1 and 4741 DF, p-value: < 2.2e-16
vehicles_model_frac <- lm(frac_increase ~ `percent with vehicles`, bay_dem_distancing_pre_post)
summary(vehicles_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `percent with vehicles`, data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.0978 -0.4998 -0.1115 0.3742 3.9950
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.0837869 0.0728712 -1.15 0.25
## `percent with vehicles` 0.0144368 0.0007896 18.29 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7274 on 4741 degrees of freedom
## Multiple R-squared: 0.06587, Adjusted R-squared: 0.06568
## F-statistic: 334.3 on 1 and 4741 DF, p-value: < 2.2e-16
bay_dem_distancing_pre_post %>%
ggplot(aes(
x = `percent associates or higher`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of people with an degree at Associate's level or higher",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "Bay Area: Social Distancing and Education"
)
educ_model_dif <- lm(`% increase in staying completely home` ~ `percent associates or higher`, bay_dem_distancing_pre_post)
summary(educ_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent associates or higher`,
## data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -67.324 -5.095 0.804 6.297 26.237
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 12.456294 0.379359 32.84 <2e-16 ***
## `percent associates or higher` 0.235877 0.006992 33.74 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.703 on 4741 degrees of freedom
## Multiple R-squared: 0.1936, Adjusted R-squared: 0.1934
## F-statistic: 1138 on 1 and 4741 DF, p-value: < 2.2e-16
educ_model_frac <- lm(frac_increase ~ `percent associates or higher`, bay_dem_distancing_pre_post)
summary(educ_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `percent associates or higher`,
## data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3485 -0.3817 -0.0352 0.3455 3.5561
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.3380681 0.0258703 13.07 <2e-16 ***
## `percent associates or higher` 0.0177958 0.0004768 37.32 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6617 on 4741 degrees of freedom
## Multiple R-squared: 0.2271, Adjusted R-squared: 0.2269
## F-statistic: 1393 on 1 and 4741 DF, p-value: < 2.2e-16
bay_dem_distancing_pre_post %>%
ggplot(aes(
x = `percent high speed`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of households with broadband such as cable, fiber optic or DSL",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "Bay Area: Social Distancing and High Speed Internet"
)
internet_model_dif <- lm(`% increase in staying completely home` ~ `percent high speed`, bay_dem_distancing_pre_post)
summary(internet_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `percent high speed`,
## data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -62.594 -5.668 0.360 6.336 36.721
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.7157 0.8052 -2.131 0.0331 *
## `percent high speed` 0.3289 0.0100 32.872 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.75 on 4741 degrees of freedom
## Multiple R-squared: 0.1856, Adjusted R-squared: 0.1854
## F-statistic: 1081 on 1 and 4741 DF, p-value: < 2.2e-16
internet_model_frac <- lm(frac_increase ~ `percent high speed`, bay_dem_distancing_pre_post)
summary(internet_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `percent high speed`, data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3879 -0.4325 -0.0922 0.3387 3.7733
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -0.5262907 0.0564584 -9.322 <2e-16 ***
## `percent high speed` 0.0222260 0.0007015 31.682 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6837 on 4741 degrees of freedom
## Multiple R-squared: 0.1747, Adjusted R-squared: 0.1746
## F-statistic: 1004 on 1 and 4741 DF, p-value: < 2.2e-16
# white
bay_dem_distancing_pre_post %>%
ggplot(aes(
x = `% white`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are white",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "Bay Area: Social Distancing and White Residents"
)
white_model_dif <- lm(`% increase in staying completely home` ~ `% white`, bay_dem_distancing_pre_post)
summary(white_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% white`,
## data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -61.389 -6.847 -0.048 7.117 32.003
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 25.556773 0.384698 66.433 < 2e-16 ***
## `% white` -0.022638 0.006531 -3.466 0.000533 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.79 on 4741 degrees of freedom
## Multiple R-squared: 0.002528, Adjusted R-squared: 0.002317
## F-statistic: 12.01 on 1 and 4741 DF, p-value: 0.0005325
white_model_frac <- lm(frac_increase ~ `% white`, bay_dem_distancing_pre_post)
summary(white_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% white`, data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1100 -0.5091 -0.1160 0.3948 4.1101
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0602360 0.0266874 39.728 < 2e-16 ***
## `% white` 0.0032404 0.0004531 7.152 9.85e-13 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7486 on 4741 degrees of freedom
## Multiple R-squared: 0.01067, Adjusted R-squared: 0.01047
## F-statistic: 51.15 on 1 and 4741 DF, p-value: 9.85e-13
# asian
bay_dem_distancing_pre_post %>%
ggplot(aes(
x = `% Asian`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are Asian",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "Bay Area: Social Distancing and Asian Residents"
)
asian_model_dif <- lm(`% increase in staying completely home` ~ `% Asian`, bay_dem_distancing_pre_post)
summary(asian_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% Asian`,
## data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -57.551 -5.998 0.063 6.561 31.500
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 19.516119 0.218434 89.34 <2e-16 ***
## `% Asian` 0.204167 0.006943 29.41 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.936 on 4741 degrees of freedom
## Multiple R-squared: 0.1543, Adjusted R-squared: 0.1541
## F-statistic: 864.8 on 1 and 4741 DF, p-value: < 2.2e-16
asian_model_frac <- lm(frac_increase ~ `% Asian`, bay_dem_distancing_pre_post)
summary(asian_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% Asian`, data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.3227 -0.4851 -0.1477 0.3732 4.1343
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 1.0174405 0.0160032 63.58 <2e-16 ***
## `% Asian` 0.0091914 0.0005087 18.07 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.728 on 4741 degrees of freedom
## Multiple R-squared: 0.06443, Adjusted R-squared: 0.06424
## F-statistic: 326.5 on 1 and 4741 DF, p-value: < 2.2e-16
# hispanic/latino
bay_dem_distancing_pre_post %>%
ggplot(aes(
x = `% non hispanic/latino`,
y = `% increase in staying completely home`
)) + geom_point() +
geom_smooth(method=lm) +
labs(
x = "Percent of residents that are not Hispanic or Latino",
y = "Dif in % completely at home after shelter-in-place relative to before",
title = "Bay Area: Social Distancing and Hispanic/Latino Residents"
)
hisp_model_dif <- lm(`% increase in staying completely home` ~ `% non hispanic/latino`, bay_dem_distancing_pre_post)
summary(hisp_model_dif)
##
## Call:
## lm(formula = `% increase in staying completely home` ~ `% non hispanic/latino`,
## data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -65.159 -5.758 0.740 6.654 28.741
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 9.146918 0.593446 15.41 <2e-16 ***
## `% non hispanic/latino` 0.195944 0.007417 26.42 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 10.09 on 4741 degrees of freedom
## Multiple R-squared: 0.1283, Adjusted R-squared: 0.1281
## F-statistic: 697.9 on 1 and 4741 DF, p-value: < 2.2e-16
hisp_model_frac <- lm(frac_increase ~ `% non hispanic/latino`, bay_dem_distancing_pre_post)
summary(hisp_model_frac)
##
## Call:
## lm(formula = frac_increase ~ `% non hispanic/latino`, data = bay_dem_distancing_pre_post)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.1843 -0.4399 -0.0552 0.3701 3.8168
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 0.0930276 0.0408371 2.278 0.0228 *
## `% non hispanic/latino` 0.0147233 0.0005104 28.846 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6942 on 4741 degrees of freedom
## Multiple R-squared: 0.1493, Adjusted R-squared: 0.1491
## F-statistic: 832.1 on 1 and 4741 DF, p-value: < 2.2e-16
difs_model_inc_span <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish`)
summary(difs_model_inc_span)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~
## bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -67.599 -4.764 0.713 5.893 26.968
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 10.504466 0.645182
## bay_dem_distancing_pre_post$`% over 125,000` 0.260491 0.007621
## bay_dem_distancing_pre_post$`% not speaking spanish` 0.043381 0.008828
## t value Pr(>|t|)
## (Intercept) 16.281 < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000` 34.182 < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish` 4.914 9.22e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.104 on 4740 degrees of freedom
## Multiple R-squared: 0.2902, Adjusted R-squared: 0.2899
## F-statistic: 969.1 on 2 and 4740 DF, p-value: < 2.2e-16
frac_model_inc_span <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish`)
summary(frac_model_inc_span)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +
## bay_dem_distancing_pre_post$`% not speaking spanish`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5031 -0.3574 -0.0205 0.3212 3.6630
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.2004385 0.0435578
## bay_dem_distancing_pre_post$`% over 125,000` 0.0194311 0.0005145
## bay_dem_distancing_pre_post$`% not speaking spanish` 0.0032616 0.0005960
## t value Pr(>|t|)
## (Intercept) 4.602 4.30e-06 ***
## bay_dem_distancing_pre_post$`% over 125,000` 37.767 < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish` 5.473 4.66e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6146 on 4740 degrees of freedom
## Multiple R-squared: 0.3333, Adjusted R-squared: 0.333
## F-statistic: 1185 on 2 and 4740 DF, p-value: < 2.2e-16
difs_model_inc_span_educ <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_span_educ)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~
## bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish` +
## bay_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -67.941 -4.669 0.849 5.911 25.738
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 10.974364 0.652670
## bay_dem_distancing_pre_post$`% over 125,000` 0.237072 0.009274
## bay_dem_distancing_pre_post$`% not speaking spanish` 0.019177 0.010378
## bay_dem_distancing_pre_post$`percent associates or higher` 0.049164 0.011139
## t value Pr(>|t|)
## (Intercept) 16.815 < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000` 25.564 < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish` 1.848 0.0647 .
## bay_dem_distancing_pre_post$`percent associates or higher` 4.414 1.04e-05 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.086 on 4739 degrees of freedom
## Multiple R-squared: 0.2931, Adjusted R-squared: 0.2927
## F-statistic: 655.1 on 3 and 4739 DF, p-value: < 2.2e-16
frac_model_inc_span_educ <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% not speaking spanish` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_span_educ)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +
## bay_dem_distancing_pre_post$`% not speaking spanish` + bay_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5284 -0.3470 -0.0176 0.3248 3.6048
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.2406074 0.0440086
## bay_dem_distancing_pre_post$`% over 125,000` 0.0174292 0.0006253
## bay_dem_distancing_pre_post$`% not speaking spanish` 0.0011925 0.0006998
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0042028 0.0007511
## t value Pr(>|t|)
## (Intercept) 5.467 4.80e-08 ***
## bay_dem_distancing_pre_post$`% over 125,000` 27.873 < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish` 1.704 0.0884 .
## bay_dem_distancing_pre_post$`percent associates or higher` 5.596 2.32e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6126 on 4739 degrees of freedom
## Multiple R-squared: 0.3376, Adjusted R-squared: 0.3372
## F-statistic: 805.3 on 3 and 4739 DF, p-value: < 2.2e-16
The effect of Spanish language speaking vanishes when accounting for both education and income.
difs_model_inc_eng_educ <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_eng_educ)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~
## bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` +
## bay_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -71.556 -4.375 0.911 5.758 25.075
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 24.885888 1.402015
## bay_dem_distancing_pre_post$`% over 125,000` 0.246048 0.009201
## bay_dem_distancing_pre_post$`% speaking english > well` -0.162318 0.017074
## bay_dem_distancing_pre_post$`percent associates or higher` 0.094982 0.010066
## t value Pr(>|t|)
## (Intercept) 17.750 <2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000` 26.742 <2e-16 ***
## bay_dem_distancing_pre_post$`% speaking english > well` -9.507 <2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher` 9.436 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.004 on 4739 degrees of freedom
## Multiple R-squared: 0.3059, Adjusted R-squared: 0.3054
## F-statistic: 696.1 on 3 and 4739 DF, p-value: < 2.2e-16
frac_model_inc_eng_educ <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_eng_educ)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +
## bay_dem_distancing_pre_post$`% speaking english > well` +
## bay_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5749 -0.3448 -0.0136 0.3186 3.6055
##
## Coefficients:
## Estimate
## (Intercept) 0.7406258
## bay_dem_distancing_pre_post$`% over 125,000` 0.0177690
## bay_dem_distancing_pre_post$`% speaking english > well` -0.0055003
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0060631
## Std. Error t value
## (Intercept) 0.0952022 7.780
## bay_dem_distancing_pre_post$`% over 125,000` 0.0006248 28.440
## bay_dem_distancing_pre_post$`% speaking english > well` 0.0011594 -4.744
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0006835 8.870
## Pr(>|t|)
## (Intercept) 8.87e-15 ***
## bay_dem_distancing_pre_post$`% over 125,000` < 2e-16 ***
## bay_dem_distancing_pre_post$`% speaking english > well` 2.16e-06 ***
## bay_dem_distancing_pre_post$`percent associates or higher` < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6114 on 4739 degrees of freedom
## Multiple R-squared: 0.3404, Adjusted R-squared: 0.34
## F-statistic: 815.1 on 3 and 4739 DF, p-value: < 2.2e-16
difs_model_lots <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`% not speaking spanish` + bay_dem_distancing_pre_post$`percent with vehicles`)
summary(difs_model_lots)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~
## bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` +
## bay_dem_distancing_pre_post$`percent associates or higher` +
## bay_dem_distancing_pre_post$`% not speaking spanish` +
## bay_dem_distancing_pre_post$`percent with vehicles`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -74.596 -4.373 0.784 5.597 25.163
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 18.551493 1.472279
## bay_dem_distancing_pre_post$`% over 125,000` 0.206936 0.009617
## bay_dem_distancing_pre_post$`% speaking english > well` -0.270913 0.019019
## bay_dem_distancing_pre_post$`percent associates or higher` 0.096720 0.011274
## bay_dem_distancing_pre_post$`% not speaking spanish` 0.075109 0.010810
## bay_dem_distancing_pre_post$`percent with vehicles` 0.125811 0.010994
## t value Pr(>|t|)
## (Intercept) 12.601 < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000` 21.517 < 2e-16 ***
## bay_dem_distancing_pre_post$`% speaking english > well` -14.244 < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher` 8.579 < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish` 6.948 4.2e-12 ***
## bay_dem_distancing_pre_post$`percent with vehicles` 11.444 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.858 on 4737 degrees of freedom
## Multiple R-squared: 0.3284, Adjusted R-squared: 0.3277
## F-statistic: 463.3 on 5 and 4737 DF, p-value: < 2.2e-16
frac_model_lots <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% speaking english > well` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`% not speaking spanish` + bay_dem_distancing_pre_post$`percent with vehicles`)
summary(frac_model_lots)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +
## bay_dem_distancing_pre_post$`% speaking english > well` +
## bay_dem_distancing_pre_post$`percent associates or higher` +
## bay_dem_distancing_pre_post$`% not speaking spanish` + bay_dem_distancing_pre_post$`percent with vehicles`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.7213 -0.3459 -0.0212 0.3139 3.5418
##
## Coefficients:
## Estimate
## (Intercept) 0.3019753
## bay_dem_distancing_pre_post$`% over 125,000` 0.0150253
## bay_dem_distancing_pre_post$`% speaking english > well` -0.0124690
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0068811
## bay_dem_distancing_pre_post$`% not speaking spanish` 0.0038535
## bay_dem_distancing_pre_post$`percent with vehicles` 0.0090237
## Std. Error t value
## (Intercept) 0.0999814 3.020
## bay_dem_distancing_pre_post$`% over 125,000` 0.0006531 23.006
## bay_dem_distancing_pre_post$`% speaking english > well` 0.0012916 -9.654
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0007656 8.987
## bay_dem_distancing_pre_post$`% not speaking spanish` 0.0007341 5.249
## bay_dem_distancing_pre_post$`percent with vehicles` 0.0007466 12.087
## Pr(>|t|)
## (Intercept) 0.00254 **
## bay_dem_distancing_pre_post$`% over 125,000` < 2e-16 ***
## bay_dem_distancing_pre_post$`% speaking english > well` < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher` < 2e-16 ***
## bay_dem_distancing_pre_post$`% not speaking spanish` 1.59e-07 ***
## bay_dem_distancing_pre_post$`percent with vehicles` < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6016 on 4737 degrees of freedom
## Multiple R-squared: 0.3617, Adjusted R-squared: 0.361
## F-statistic: 536.8 on 5 and 4737 DF, p-value: < 2.2e-16
difs_model_inc_hisp_educ <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% non hispanic/latino` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_hisp_educ)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~
## bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% non hispanic/latino` +
## bay_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -67.957 -4.661 0.802 5.870 25.842
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 10.804737 0.539092
## bay_dem_distancing_pre_post$`% over 125,000` 0.236815 0.009258
## bay_dem_distancing_pre_post$`% non hispanic/latino` 0.028591 0.009812
## bay_dem_distancing_pre_post$`percent associates or higher` 0.040655 0.011558
## t value Pr(>|t|)
## (Intercept) 20.042 < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000` 25.578 < 2e-16 ***
## bay_dem_distancing_pre_post$`% non hispanic/latino` 2.914 0.00359 **
## bay_dem_distancing_pre_post$`percent associates or higher` 3.517 0.00044 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.081 on 4739 degrees of freedom
## Multiple R-squared: 0.2939, Adjusted R-squared: 0.2934
## F-statistic: 657.5 on 3 and 4739 DF, p-value: < 2.2e-16
frac_model_inc_hisp_educ <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% non hispanic/latino` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_hisp_educ)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +
## bay_dem_distancing_pre_post$`% non hispanic/latino` + bay_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5111 -0.3460 -0.0157 0.3236 3.6078
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.2185664 0.0363438
## bay_dem_distancing_pre_post$`% over 125,000` 0.0173985 0.0006242
## bay_dem_distancing_pre_post$`% non hispanic/latino` 0.0020562 0.0006615
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0034850 0.0007792
## t value Pr(>|t|)
## (Intercept) 6.014 1.95e-09 ***
## bay_dem_distancing_pre_post$`% over 125,000` 27.875 < 2e-16 ***
## bay_dem_distancing_pre_post$`% non hispanic/latino` 3.108 0.00189 **
## bay_dem_distancing_pre_post$`percent associates or higher` 4.472 7.92e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6122 on 4739 degrees of freedom
## Multiple R-squared: 0.3386, Adjusted R-squared: 0.3382
## F-statistic: 808.7 on 3 and 4739 DF, p-value: < 2.2e-16
difs_model_inc_white_educ <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% white` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_white_educ)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~
## bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% white` +
## bay_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.577 -4.346 0.865 5.637 25.566
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 15.481337 0.395815
## bay_dem_distancing_pre_post$`% over 125,000` 0.244686 0.008961
## bay_dem_distancing_pre_post$`% white` -0.100580 0.005614
## bay_dem_distancing_pre_post$`percent associates or higher` 0.093119 0.009340
## t value Pr(>|t|)
## (Intercept) 39.11 <2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000` 27.31 <2e-16 ***
## bay_dem_distancing_pre_post$`% white` -17.91 <2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher` 9.97 <2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.796 on 4739 degrees of freedom
## Multiple R-squared: 0.3375, Adjusted R-squared: 0.3371
## F-statistic: 804.7 on 3 and 4739 DF, p-value: < 2.2e-16
frac_model_inc_white_educ <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% white` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_white_educ)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +
## bay_dem_distancing_pre_post$`% white` + bay_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5993 -0.3464 -0.0221 0.3214 3.5394
##
## Coefficients:
## Estimate
## (Intercept) 0.3755433
## bay_dem_distancing_pre_post$`% over 125,000` 0.0176384
## bay_dem_distancing_pre_post$`% white` -0.0020731
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0055609
## Std. Error t value
## (Intercept) 0.0274949 13.659
## bay_dem_distancing_pre_post$`% over 125,000` 0.0006225 28.337
## bay_dem_distancing_pre_post$`% white` 0.0003900 -5.316
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0006488 8.572
## Pr(>|t|)
## (Intercept) < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000` < 2e-16 ***
## bay_dem_distancing_pre_post$`% white` 1.11e-07 ***
## bay_dem_distancing_pre_post$`percent associates or higher` < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.611 on 4739 degrees of freedom
## Multiple R-squared: 0.3412, Adjusted R-squared: 0.3408
## F-statistic: 818 on 3 and 4739 DF, p-value: < 2.2e-16
difs_model_inc_asian_educ <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(difs_model_inc_asian_educ)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~
## bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` +
## bay_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -63.773 -4.319 0.846 5.470 25.900
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 10.171629 0.344844
## bay_dem_distancing_pre_post$`% over 125,000` 0.216081 0.008794
## bay_dem_distancing_pre_post$`% Asian` 0.146978 0.006176
## bay_dem_distancing_pre_post$`percent associates or higher` 0.044416 0.008965
## t value Pr(>|t|)
## (Intercept) 29.496 < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000` 24.571 < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian` 23.798 < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher` 4.954 7.51e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.59 on 4739 degrees of freedom
## Multiple R-squared: 0.3681, Adjusted R-squared: 0.3677
## F-statistic: 920.4 on 3 and 4739 DF, p-value: < 2.2e-16
frac_model_inc_asian_educ <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher`)
summary(frac_model_inc_asian_educ)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +
## bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5673 -0.3471 -0.0238 0.3108 3.4445
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.2473638 0.0243231
## bay_dem_distancing_pre_post$`% over 125,000` 0.0168190 0.0006203
## bay_dem_distancing_pre_post$`% Asian` 0.0045477 0.0004356
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0043957 0.0006323
## t value Pr(>|t|)
## (Intercept) 10.170 < 2e-16 ***
## bay_dem_distancing_pre_post$`% over 125,000` 27.115 < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian` 10.439 < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher` 6.951 4.11e-12 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6059 on 4739 degrees of freedom
## Multiple R-squared: 0.3521, Adjusted R-squared: 0.3517
## F-statistic: 858.6 on 3 and 4739 DF, p-value: < 2.2e-16
difs_model_inc_asian_educ_eng <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`% speaking english > well`)
summary(difs_model_inc_asian_educ_eng)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~
## bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` +
## bay_dem_distancing_pre_post$`percent associates or higher` +
## bay_dem_distancing_pre_post$`% speaking english > well`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -63.380 -4.339 0.806 5.473 25.909
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 8.980148 1.526570
## bay_dem_distancing_pre_post$`% over 125,000` 0.215010 0.008895
## bay_dem_distancing_pre_post$`% Asian` 0.149463 0.006911
## bay_dem_distancing_pre_post$`percent associates or higher` 0.041008 0.009923
## bay_dem_distancing_pre_post$`% speaking english > well` 0.014606 0.018230
## t value Pr(>|t|)
## (Intercept) 5.883 4.32e-09 ***
## bay_dem_distancing_pre_post$`% over 125,000` 24.171 < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian` 21.625 < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher` 4.132 3.65e-05 ***
## bay_dem_distancing_pre_post$`% speaking english > well` 0.801 0.423
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.591 on 4738 degrees of freedom
## Multiple R-squared: 0.3682, Adjusted R-squared: 0.3677
## F-statistic: 690.4 on 4 and 4738 DF, p-value: < 2.2e-16
frac_model_inc_asian_educ_eng <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`% speaking english > well`)
summary(frac_model_inc_asian_educ_eng)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +
## bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` +
## bay_dem_distancing_pre_post$`% speaking english > well`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5678 -0.3470 -0.0234 0.3107 3.4454
##
## Coefficients:
## Estimate
## (Intercept) 0.2593220
## bay_dem_distancing_pre_post$`% over 125,000` 0.0168298
## bay_dem_distancing_pre_post$`% Asian` 0.0045227
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0044299
## bay_dem_distancing_pre_post$`% speaking english > well` -0.0001466
## Std. Error t value
## (Intercept) 0.1076818 2.408
## bay_dem_distancing_pre_post$`% over 125,000` 0.0006275 26.822
## bay_dem_distancing_pre_post$`% Asian` 0.0004875 9.277
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0007000 6.329
## bay_dem_distancing_pre_post$`% speaking english > well` 0.0012859 -0.114
## Pr(>|t|)
## (Intercept) 0.0161 *
## bay_dem_distancing_pre_post$`% over 125,000` < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian` < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher` 2.7e-10 ***
## bay_dem_distancing_pre_post$`% speaking english > well` 0.9092
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.606 on 4738 degrees of freedom
## Multiple R-squared: 0.3521, Adjusted R-squared: 0.3516
## F-statistic: 643.8 on 4 and 4738 DF, p-value: < 2.2e-16
difs_model_inc_asian_educ_internet <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent high speed`)
summary(difs_model_inc_asian_educ_internet)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~
## bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` +
## bay_dem_distancing_pre_post$`percent associates or higher` +
## bay_dem_distancing_pre_post$`percent high speed`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -63.113 -4.243 0.765 5.392 25.016
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 4.920983 0.744977
## bay_dem_distancing_pre_post$`% over 125,000` 0.191355 0.009276
## bay_dem_distancing_pre_post$`% Asian` 0.142700 0.006160
## bay_dem_distancing_pre_post$`percent associates or higher` 0.026552 0.009187
## bay_dem_distancing_pre_post$`percent high speed` 0.091126 0.011481
## t value Pr(>|t|)
## (Intercept) 6.606 4.40e-11 ***
## bay_dem_distancing_pre_post$`% over 125,000` 20.629 < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian` 23.167 < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher` 2.890 0.00387 **
## bay_dem_distancing_pre_post$`percent high speed` 7.937 2.56e-15 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.535 on 4738 degrees of freedom
## Multiple R-squared: 0.3764, Adjusted R-squared: 0.3759
## F-statistic: 715.1 on 4 and 4738 DF, p-value: < 2.2e-16
frac_model_inc_asian_educ_internet <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent high speed`)
summary(frac_model_inc_asian_educ_internet)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +
## bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` +
## bay_dem_distancing_pre_post$`percent high speed`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.4813 -0.3503 -0.0241 0.3029 3.4331
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.0320957 0.0527768
## bay_dem_distancing_pre_post$`% over 125,000` 0.0158053 0.0006571
## bay_dem_distancing_pre_post$`% Asian` 0.0043723 0.0004364
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0036633 0.0006508
## bay_dem_distancing_pre_post$`percent high speed` 0.0037360 0.0008133
## t value Pr(>|t|)
## (Intercept) 0.608 0.543
## bay_dem_distancing_pre_post$`% over 125,000` 24.052 < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian` 10.019 < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher` 5.629 1.92e-08 ***
## bay_dem_distancing_pre_post$`percent high speed` 4.593 4.47e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6046 on 4738 degrees of freedom
## Multiple R-squared: 0.355, Adjusted R-squared: 0.3545
## F-statistic: 652 on 4 and 4738 DF, p-value: < 2.2e-16
difs_model_inc_asian_educ_vehicle <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent with vehicles`)
summary(difs_model_inc_asian_educ_vehicle)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~
## bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` +
## bay_dem_distancing_pre_post$`percent associates or higher` +
## bay_dem_distancing_pre_post$`percent with vehicles`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -64.211 -4.317 0.752 5.442 26.869
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 1.966897 0.937917
## bay_dem_distancing_pre_post$`% over 125,000` 0.186136 0.009279
## bay_dem_distancing_pre_post$`% Asian` 0.152654 0.006150
## bay_dem_distancing_pre_post$`percent associates or higher` 0.059307 0.009024
## bay_dem_distancing_pre_post$`percent with vehicles` 0.093000 0.009901
## t value Pr(>|t|)
## (Intercept) 2.097 0.036 *
## bay_dem_distancing_pre_post$`% over 125,000` 20.060 < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian` 24.823 < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher` 6.572 5.5e-11 ***
## bay_dem_distancing_pre_post$`percent with vehicles` 9.393 < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.512 on 4738 degrees of freedom
## Multiple R-squared: 0.3797, Adjusted R-squared: 0.3792
## F-statistic: 725 on 4 and 4738 DF, p-value: < 2.2e-16
frac_model_inc_asian_educ_vehicle <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent with vehicles`)
summary(frac_model_inc_asian_educ_vehicle)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +
## bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` +
## bay_dem_distancing_pre_post$`percent with vehicles`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5398 -0.3487 -0.0304 0.3030 3.4025
##
## Coefficients:
## Estimate
## (Intercept) -0.3860752
## bay_dem_distancing_pre_post$`% over 125,000` 0.0145071
## bay_dem_distancing_pre_post$`% Asian` 0.0049859
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0055453
## bay_dem_distancing_pre_post$`percent with vehicles` 0.0071800
## Std. Error t value
## (Intercept) 0.0660327 -5.847
## bay_dem_distancing_pre_post$`% over 125,000` 0.0006533 22.207
## bay_dem_distancing_pre_post$`% Asian` 0.0004330 11.516
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0006353 8.728
## bay_dem_distancing_pre_post$`percent with vehicles` 0.0006970 10.301
## Pr(>|t|)
## (Intercept) 5.35e-09 ***
## bay_dem_distancing_pre_post$`% over 125,000` < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian` < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher` < 2e-16 ***
## bay_dem_distancing_pre_post$`percent with vehicles` < 2e-16 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5993 on 4738 degrees of freedom
## Multiple R-squared: 0.3663, Adjusted R-squared: 0.3658
## F-statistic: 684.8 on 4 and 4738 DF, p-value: < 2.2e-16
difs_model_inc_asian_educ_vehicle <- lm(bay_dem_distancing_pre_post$`% increase in staying completely home` ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent with vehicles` + bay_dem_distancing_pre_post$`percent high speed`)
summary(difs_model_inc_asian_educ_vehicle)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$`% increase in staying completely home` ~
## bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` +
## bay_dem_distancing_pre_post$`percent associates or higher` +
## bay_dem_distancing_pre_post$`percent with vehicles` +
## bay_dem_distancing_pre_post$`percent high speed`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -63.700 -4.286 0.735 5.447 27.725
##
## Coefficients:
## Estimate Std. Error
## (Intercept) 0.374573 0.996725
## bay_dem_distancing_pre_post$`% over 125,000` 0.176801 0.009475
## bay_dem_distancing_pre_post$`% Asian` 0.148745 0.006194
## bay_dem_distancing_pre_post$`percent associates or higher` 0.044844 0.009528
## bay_dem_distancing_pre_post$`percent with vehicles` 0.073349 0.010747
## bay_dem_distancing_pre_post$`percent high speed` 0.057724 0.012430
## t value Pr(>|t|)
## (Intercept) 0.376 0.707
## bay_dem_distancing_pre_post$`% over 125,000` 18.660 < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian` 24.014 < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher` 4.707 2.59e-06 ***
## bay_dem_distancing_pre_post$`percent with vehicles` 6.825 9.91e-12 ***
## bay_dem_distancing_pre_post$`percent high speed` 4.644 3.51e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 8.494 on 4737 degrees of freedom
## Multiple R-squared: 0.3825, Adjusted R-squared: 0.3819
## F-statistic: 586.9 on 5 and 4737 DF, p-value: < 2.2e-16
frac_model_inc_asian_educ_vehicle <- lm(bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` + bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` + bay_dem_distancing_pre_post$`percent with vehicles` + bay_dem_distancing_pre_post$`percent high speed`)
summary(frac_model_inc_asian_educ_vehicle)
##
## Call:
## lm(formula = bay_dem_distancing_pre_post$frac_increase ~ bay_dem_distancing_pre_post$`% over 125,000` +
## bay_dem_distancing_pre_post$`% Asian` + bay_dem_distancing_pre_post$`percent associates or higher` +
## bay_dem_distancing_pre_post$`percent with vehicles` + bay_dem_distancing_pre_post$`percent high speed`)
##
## Residuals:
## Min 1Q Median 3Q Max
## -2.5278 -0.3496 -0.0302 0.3040 3.4030
##
## Coefficients:
## Estimate
## (Intercept) -0.4012985
## bay_dem_distancing_pre_post$`% over 125,000` 0.0144179
## bay_dem_distancing_pre_post$`% Asian` 0.0049485
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0054070
## bay_dem_distancing_pre_post$`percent with vehicles` 0.0069921
## bay_dem_distancing_pre_post$`percent high speed` 0.0005519
## Std. Error t value
## (Intercept) 0.0703295 -5.706
## bay_dem_distancing_pre_post$`% over 125,000` 0.0006685 21.566
## bay_dem_distancing_pre_post$`% Asian` 0.0004371 11.322
## bay_dem_distancing_pre_post$`percent associates or higher` 0.0006723 8.043
## bay_dem_distancing_pre_post$`percent with vehicles` 0.0007583 9.220
## bay_dem_distancing_pre_post$`percent high speed` 0.0008771 0.629
## Pr(>|t|)
## (Intercept) 1.23e-08 ***
## bay_dem_distancing_pre_post$`% over 125,000` < 2e-16 ***
## bay_dem_distancing_pre_post$`% Asian` < 2e-16 ***
## bay_dem_distancing_pre_post$`percent associates or higher` 1.10e-15 ***
## bay_dem_distancing_pre_post$`percent with vehicles` < 2e-16 ***
## bay_dem_distancing_pre_post$`percent high speed` 0.529
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5993 on 4737 degrees of freedom
## Multiple R-squared: 0.3664, Adjusted R-squared: 0.3657
## F-statistic: 547.8 on 5 and 4737 DF, p-value: < 2.2e-16
This model seems to capture the most variation, though it is only an improvement of about 1% of the variation predicted than the previous one with solely income, education, and Asian residents.